#https://www.qiushibaike.com/8hr/page/2/
#https://www.qiushibaike.com/8hr/page/3/
#https://www.qiushibaike.com/text/page/2/
import requests
import re
# ret=requests.get('https://www.qiushibaike.com/')
# result=ret.content.decode() #result 是一个字符串
# print(result)
# temp='abcd123ef456'
# ret=re.search('\d+',temp)#列表
# print(ret)
start_url='https://www.qiushibaike.com/8hr/page/{}/'#网页的初始代码模板
headers = {

        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
for j in range(13):
    url=start_url.format(j+1)
    ret=requests.get(url,headers=headers)
    result=ret.content.decode() #result 是一个字符串
    # print(result)
    content=re.findall('<div class="content">.*?<span>(.*?)</span>.*?</div>',result,re.S)#re.S匹配换行内容
    #                    a是以添加的方式
    # with open('糗事.txt','a',encoding='utf-8') as f:
    #     for i in content:
    #         #print(i)
    #         k=re.sub('<br/>''',i)#把<br/>更换成空
    #         f.write(k)#写入我们更改后的数据
    # print('正在保存第%d页'%(j+1))